#!/bin/bash
#SBATCH --job-name=prsice2
#SBATCH --account=gem
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=8
#SBATCH --mem=32gb
#SBATCH --time=24:00:00

#code to clump at r2=0.01 using 10MB LD-windown in 1000 Genomes (European population, N=489)
#GWAS summary statistics fields: CHR,BP,SNP,A1,A2,N,SE,P,OR|BETA,INFO,MAF
#example with 1000 Genomes genotype File (QC_1000G_P3*): VCF files downloaded from https://www.internationalgenome.org/category/genotypes/)
#plink (v1.9b) command to convert vcf to bed files (plink --vcf vcf_name --make-bed --out QC_1000G_P3)

#parameters of code
output_folder=$1 #path to output directory
name=$2 #name of trait. it should be the same as in the basefile
binary=$3 # true (binary traits) or false (continuous traits)
stats=$4 OR (binary traits) or BETA (continuous traits)

Rscript ./PRSice.R \
--prsice ./PRSice_linux \
--base ./Data/${name}.txt  \
--snp SNP \
--a1 A1 \
--pvalue P \
--target ./Data/QC_1000G_P3 #EUR 1000 Genomes Genotype \
--binary-target ${binary} \
--pheno ./Data/QC_1000G_P3.fam \
--bar-levels 5e-08 \
--keep-ambig \
--clump-kb 5M \
--clump-r2 0.01 \
--fastscore \
--stat  ${stats} \
--beta #if continuous traits \
--all-score \
--print-snp \
--score sum \
--no-regress \
--out ${output_folder}/${name}_1000G_0.01


